﻿/**
 * Figures out which words in a list most closely match a word.
 * @param {String} findThis_str - A string to try to match. This is typically the first few letters of a word being typed in by hand.
 * @param {Array} inThis_ary - The array of all known words that can be used as output guesses.
 * @param {Number} requireFirstLetters - (Optional) A minimum length for findThis_str that must be met before attempting to make guesses. This defaults to a value of 1 when omitted.
 * @return {Array} - An array of indexes representing a sort order for the inThis_ary array with the best matches at the beginning and worst matches at the end.  The closest match is found with:  inThis_ary[ returned_ary[ 0 ] ]
 * 
 * @version 1.8
 * @date (2023-12-29)
 * @author Humbird0 <humbird0@gmail.com>
 * @license Creative Commons Attribution 4.0 License
 *
 * @example
 * // returns bc-d
 * #include "functions/fuzzySearch.as"
 * var searchThese_ary = [ "abc", "abcdef", "bcdefg", "bcde", "bc-d", "b-c-d" ]
 * var findThis = "bcd";
 * var results_ary = fuzzySearch( findThis_str, searchThese_ary );
 * var bestMatch_str = searchThese_ary[ results_ary[0] ];
 * trace("bestMatch_str: "+bestMatch_str);
 */
/*	Version 1.8
1.8		CHANGE:		Added "requireFirstLetters" parameter to heavily favor words that start the same as the desired word
			CHANGE:		Added scoreCache to speed up score calculations for words that have already been scored
1.7		CHANGE:		Able to skip the first letter when guessing
1.6		FIX:  When inThis_ary is empty, return an empty array as the result.
						Output is predictable. Always returns an array, even when inputs are invalid.
1.5		FIX:	numbers are converted into strings,  so 1 will match with "1"
1.4		FIX: 	non-strings were being converted into strings,  so undefined would be read as the string "undefined" and match with letters like "f"
1.3		FIX:  b_isFullMatch was being calculated wrong
1.2		Catch invalid parameters.
1.1		Case is completely ignored.  Strings are always compared as lower-case.


USAGE: 
	var searchThese_ary = [
		"ab", 
		"abc", 
		"abcdef", 
		"bcdefg", 
		"bcde", 
		"1ab", 
		"2", 
		12, 
		2, 
		1, 
		9, 
		6, 
		96, 
		196, 
		"bc-d", 
		"b-cd", 
		"b-c-d", 
		"-b-c-d", 
		false, 
		true, 
		undefined, 
		null, 
		(1/0)
	];
	#include "functions/fuzzySearch.as"
	var findThis_str = "bcd";		// bcd
	var results_ary = fuzzySearch( findThis_str, searchThese_ary );
	var bestMatch_str = searchThese_ary[ results_ary[0] ];
	trace("bestMatch_str: "+bestMatch_str);
	
	var result_ary = new Array( searchThese_ary.length );
	for(var i=0; i<searchThese_ary.length; i++)			result_ary[ i ] = searchThese_ary[ index_ary[i] ];
	trace( result_ary );


DESCRIPTION: 
	fuzzySearch() takes a string and an array.  It searches for the string within the array of strings  (it'll treat every item as a string,  reading the number 17 as the word "17")
	fuzzySearch() returns an index-array.  The 1st item of it indicates the location of the best-match within the input-array.  (so output[0] contains the index-location of the best-matching item in the input-array )
	
NOTES: 
	exact-matches take priority over approximate matches
	exact-matches with fewer total characters take priority over exact-matches with extra characters  (prefixes and suffixes are penalized)
	approximate matches with fewer gaps between matching letters take priority over approximate matches with more gaps  (a "gap" is any non-matching character.  Such as searching for "bcd" within "bc-d")
*/



function fuzzySearch( findThis_str, inThis_ary, requireFirstLetters ){
	var requireFirstLetters = Math.min(  Number( requireFirstLetters ) || 1,  findThis_str.length  );
	var fullMatchBoost = 500;  // always higher priority than findThis_str-matches  (assumes that findThis_str matches will be less than 1000 chars)
	var startMismatchPenalty = 1000;
	var gapPenalty = 0.1;		// (0.1 recommended)		searching for "bcd"		0.1-0.4 => b-c-d before abc		0.5-0.9 => abc before b-c-d		1.0 => each gap cancels-out a matching letter
	
	if( !findThis_str )					return [];
	if( !inThis_ary )						return [];
	if( !inThis_ary.length )		return [];
	var param1IsString = ( typeof(findThis_str) === "string" )
	if( !param1IsString ){
		trace("fuzzySearch ERROR:  findThis_str is not a string");
		//mdm.Exception.DebugWindow.trace("fuzzySearch ERROR:  findThis_str is not a string");
		return [];
	}
	var param2IsArray = ( inThis_ary instanceof Array );
	if( !param2IsArray ){
		trace("fuzzySearch ERROR:  param2IsArray is not an array");
		//mdm.Exception.DebugWindow.trace("fuzzySearch ERROR:  param2IsArray is not an array");
		return [];
	}
	
	var findThis_str = findThis_str.toLowerCase();
	var scoreCache = {};
	
	// inputs:  findThis_str, a, b
	var index_ary = inThis_ary.sort( function( a, b ){
		// treat raw numbers as strings
		if( typeof(a) === "number" )		a = String( a );
		if( typeof(b) === "number" )		b = String( b );
		// ignore anything else that isn't a string
		var a_isString = ( typeof(a) === "string" );
		var b_isString = ( typeof(b) === "string" );
		if( !a_isString && !b_isString )		return 0;			// promote neither.  (because both are invalid)
		if( !a_isString )		return 1;			// promote b  (because a is invalid)
		if( !b_isString )		return -1;		// promote a  (because b is invalid)
		
		//var a_fullItem = String( a ).toLowerCase();		// No longer needed because everything that's allowed is converted into a string,  and everything that isn't a string is ignored
		//var b_fullItem = String( b ).toLowerCase();
		var a_fullItem = a.toLowerCase();
		var b_fullItem = b.toLowerCase();
		var a_score = 0;
		var b_score = 0;
		var a_isFullMatch = (a_fullItem.indexOf(findThis_str) > -1);
		var b_isFullMatch = (b_fullItem.indexOf(findThis_str) > -1);
		// calculate match scores
		if( a_isFullMatch )		a_score += fullMatchBoost;
		else	a_score += getMatchScore( a_fullItem );
		if( b_isFullMatch )		b_score += fullMatchBoost;
		else	b_score += getMatchScore( b_fullItem );
		//
		if( requireFirstLetters ){
			var findThisStart = findThis_str.substr( 0, requireFirstLetters );
			var a_start = a_fullItem.substr( 0, requireFirstLetters );
			var b_start = b_fullItem.substr( 0, requireFirstLetters );
			if( a_start !== findThisStart ){
				a_score -= startMismatchPenalty;
			}
			if( b_start !== findThisStart ){
				b_score -= startMismatchPenalty;
			}
		}// if :requireFirstLetters
		
		if(a_score > b_score)		return -1;		// promote a.  bigger scores go higher on the list
		if(a_score < b_score)		return 1;			// promote b.  smaller scores go lower on the list
		// if both options match,  then pick the shorter option
		if(a_score === b_score){
			if( a_fullItem.length < b_fullItem.length )		return -1;		// shorter fullItem lengths go higher on the list
			if( a_fullItem.length > b_fullItem.length )		return 1;			// longer fullItem lengths go shorter on the list
		}
		// otherwise, do not change sorting
		return 0;
	},  Array.RETURNINDEXEDARRAY  );// sort()
	
	
	// figure out how many letters of the findThis_str string are in the fullItem
	function getMatchScore( fullItem ){
		if( scoreCache[fullItem] )			return scoreCache[fullItem];
		if( fullItem === undefined )		return 0;
		var score = 0;
		var lastFoundAt = -1;
		var startAt = 0;
		for( var i = 0;  i < findThis_str.length;  i++ ){
			var thisChar = findThis_str.charAt( i );
			var foundAt = fullItem.indexOf( thisChar, startAt );
			if( foundAt > -1 ){
				// count the number of mis-matched characters between matched characters and penalize the score
				if( lastFoundAt !== -1 )
				{// if a previous match was found
					var gaps = (foundAt - lastFoundAt) -1;		// -1 because a difference of 1 in the index indicates consecutive characters,  which should not be penalized
					score -= ( gaps * gapPenalty );
				}// if a previous match was found
				lastFoundAt = foundAt;
				// findThis_str matches fullItem more than before
				score++;		// score shrinks as matches occur later
				// only consider the parts of fullItem after this match  (ignore everything before this match from now on)  (search for letters in order)
				startAt = foundAt+1;
			}
		}// for:  each character of the findThis_str string
		
		scoreCache[fullItem] = score;
		return score;
	}// getMatchScore()

	delete scoreCache;
	return index_ary;
	
}// fuzzySearch()